set more off
capture log close

//  program:    AnanatEtAl_test_score_balanced
//  task:		Run Regressions for test score results on a balanced panel of states

//change working directory to point to the file with the data
cd *** Fill in path name here ***

//create log file
log using AnanatEtAl_test_score_balanced, replace text

//load the merged bls naep data
use AnanatEtAl_test_score_data, clear

//keep a balanced panel of states that have both NAEP and BLS data for all years (CA, CT, GA, IN, LA, MA, MI, MN, MO, NY, NC, SC, TN, TX, VA)
keep if stateno==5 | stateno==7 | stateno==11 | stateno==15 | stateno==19 | stateno==22 | stateno==23 | stateno==24 | stateno==26 | stateno==33 | stateno==34 | stateno==41 | stateno==43 | stateno==44 | stateno==47

//set the panel variables
xtset stateno year

//run regressions using test-taker sample sizes as frequency weights

** All Students **
ivregress 2sls z_all yrd* stated* (seppctyrlag = ticpctyrlag) [fweight=sample_size], vce(cluster stateno) first
est store ALL
outreg2 [ALL] using test_score_balanced, drop(yrd* stated*) excel replace dec(3)

** Black Students **
ivregress 2sls z_black yrd* stated* (seppctyrlag = ticpctyrlag) [fweight=n_black], vce(cluster stateno) first
est store BLACK
outreg2 [BLACK] using test_score_balanced, drop(yrd* stated*) excel dec(3) 

** White Students **
ivregress 2sls z_white yrd* stated* (seppctyrlag = ticpctyrlag) [fweight=n_white], vce(cluster stateno) first
est store WHITE
outreg2 [WHITE] using test_score_balanced, drop(yrd* stated*) excel dec(3)

** State-years with below median unemployment **
//generate the median unemployment cutoff
sort year
by year: egen cutoff = pctile(unemplag), p(50)
keep if unemplag<=cutoff

//run regression weighted by test-taker sample size
ivregress 2sls z_all yrd* stated* (seppctyrlag = ticpctyrlag) [fweight=sample_size], vce(cluster stateno) first
est store LOWUNEMP
outreg2 [LOWUNEMP] using test_score_balanced, drop(yrd* stated*) excel dec(3)

clear all

** State-years with above median unemployment **
//load the data
use AnanatEtAl_test_score_data, clear

//keep a balanced panel of states that have both NAEP and BLS data for all years (CA, CT, GA, IN, LA, MA, MI, MN, MO, NY, NC, SC, TN, TX, VA)
keep if stateno==5 | stateno==7 | stateno==11 | stateno==15 | stateno==19 | stateno==22 | stateno==23 | stateno==24 | stateno==26 | stateno==33 | stateno==34 | stateno==41 | stateno==43 | stateno==44 | stateno==47

//generate the median unemployment cutoff value
sort year
by year: egen cutoff = pctile(unemplag), p(50)
keep if unemplag>cutoff

//set the panel variables
xtset stateno year

//run regression weighted by test-taker sample size
ivregress 2sls z_all yrd* stated* (seppctyrlag = ticpctyrlag) [fweight=sample_size], vce(cluster stateno) first
est store HIGHUNEMP
outreg2 [HIGHUNEMP] using test_score_balanced, drop(yrd* stated*) excel dec(3)

clear all

log close
